A set of log metrics were computed and inserted in the database at the time logs were selected (original untrimmed length) and again at the time they were sawn. At sawing, metrics for some logs were missed as the logs were processed before the selection app was set running.
Bin files were collected for all selected logs from ROY scanner (see KEEPBINFILES)
Bin files were collected from all true shape scanners at time of sawing:
ORI - log scan before turner and debarker. not necessarily in same sequence as sawn ROY - log scan before trideck ELI - log scan after trideck HSS - cant scan
Shape scans at Hyne are all SE first, except for the ORI scanner (well duh), and some logs on the main chain (ML thinks only small diameter logs not worth turning around)
Code that flipped logs that appeared to be presented LE first was enabled during the selection phase, but not the sawing phase.
library(lattice)
# 1. metrics in the db already
library(RODBC)
ch = odbcConnect('Hyne',uid='sa',pwd="password12")
DB = sqlQuery(ch, "select *, seq=ID, scanner='ROY', source='db' from logs where SWILogNumber is not null")
DB$trimstate[DB$DateAndTime<as.POSIXct('2014-08-31')] = 'untrimmed'
DB$trimstate[DB$DateAndTime>as.POSIXct('2014-08-31')] = 'trimmed'
DB$trimstate = factor(DB$trimstate, levels=c('untrimmed','trimmed'), ordered=TRUE)
# 2. freshly computed results from logs.csv file
CSV = read.csv('/home/harrinjj/G/Projects/Hyne/USNR/logs.csv')
re = "(?:.*USNR Image Files/)([^/]+)/(?:tri_cam)?(.+)\\.bin"
CSV$scanner = gsub(re,"\\1",CSV$binfn,perl=TRUE)
CSV$bindir = CSV$scanner
CSV$seq = as.numeric(gsub(re,"\\2",CSV$binfn,perl=TRUE))
swi.log.numbers.in.sawing.order = sqlQuery(ch, "select SWILogNumber as l from logs where SWILogNumber is not null and DateAndTime>'2014-08-31' order by DateAndTime")$l
# deal with the 8 non-trial logs that were on the deck
ii = CSV$scanner%in%c('HSS','ELI')
CSV$seq[ii] = CSV$seq[ii] - 8
#ii=CSV$scanner%in%c('ORI','ELI','HSS')&CSV$seq>0
ii=CSV$scanner%in%c('ELI','HSS')&CSV$seq>0
CSV$SWILogNumber[ii] = swi.log.numbers.in.sawing.order[CSV$seq[ii]]
CSV$source='binfiles'
#library(hash)
H = sqlQuery(ch, "select ID, SWILogNumber from logs where SWILogNumber is not null")#" and DateAndTime<'2014-08-31'")
#H = hash(keys=as.numeric(H$ID), values=as.numeric(H$SWILogNumber))
HH=c()
HH[H$ID]=H$SWILogNumber
for (ID in unique(CSV$seq[CSV$seq>500])) {
if (!is.na(HH[ID])) {
CSV$SWILogNumber[CSV$seq==ID] = HH[ID]
}
}
# old way: CSV$SWILogNumber[CSV$seq=='119902'] = DB$SWILogNumber[DB$ID=='119902']
#CSV$SWILogNumber[CSV$seq=='119808'] = DB$SWILogNumber[DB$ID=='119808']
#summary(CSV$SWILogNumber)
#CSV$binfn[is.na(CSV$SWILogNumber)]
# now merge DB and CSV
# drop stuff other than log metrics and strip the leading 'm_'
db.cols=c(c("SWILogNumber","scanner","trimstate","source","seq"),grep('m_',names(DB),perl=TRUE,value=TRUE))
DB2=DB[,db.cols]
names(DB2)<-gsub('m_','',db.cols)
rbind.all.columns <- function(x, y) {
x.diff <- setdiff(colnames(x), colnames(y))
y.diff <- setdiff(colnames(y), colnames(x))
x[, c(as.character(y.diff))] <- NA
y[, c(as.character(x.diff))] <- NA
return(rbind(x, y))
}
L = rbind.all.columns(DB2,CSV)
L$SWILogNumber = factor(L$SWILogNumber, ordered=TRUE)
L$scanner[L$scanner=='KEEPBINFILES'] = 'ROY'
L$scanner = factor(L$scanner, levels=c('ORI','ROY','ELI','HSS'), ordered=TRUE)
L = L[!is.na(L$SWILogNumber),] # drop non-trial logs
L = L[L$scanner!='HSS',] # drop cant scans
log.metrics = setdiff(names(L),c("SWILogNumber","scanner","trimstate","source","seq","fail","origOrient","nslices","dzmax","dzmin","binfn","bindir"))
Logs with unusual numbers of shape metric sets?
t=table(L$SWILogNumber)
t[t!=5]
##
## 140 150
## 6 6
L[L$SWILogNumber==141,c('binfn','source')]
## binfn
## 37 <NA>
## 241 <NA>
## 364 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/KEEPBINFILES/91201.bin
## 487 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/KEEPBINFILES/91201.bin
## 648 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/ELI/tri_cam126.bin
## source
## 37 db
## 241 db
## 364 binfiles
## 487 binfiles
## 648 binfiles
L[L$SWILogNumber==140,c('binfn','source')]
## binfn
## 38 <NA>
## 227 <NA>
## 350 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/KEEPBINFILES/91203.bin
## 473 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/KEEPBINFILES/91203.bin
## 617 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/ROY/119902.bin
## 633 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/ELI/tri_cam112.bin
## source
## 38 db
## 227 db
## 350 binfiles
## 473 binfiles
## 617 binfiles
## 633 binfiles
L[L$SWILogNumber==150,c('binfn','source')]
## binfn
## 57 <NA>
## 133 <NA>
## 256 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/KEEPBINFILES/92583.bin
## 379 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/KEEPBINFILES/92583.bin
## 616 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/ROY/119808.bin
## 659 /media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/ELI/tri_cam18.bin
## source
## 57 db
## 133 db
## 256 binfiles
## 379 binfiles
## 616 binfiles
## 659 binfiles
ROY scanner binfiles for logs 140 and 150 were not lost.
The ELI scanner binfile for log 141 (ELI/tri_cam126.bin) has issues.
Numbers of metric sets?
table(L$trimstate, L$scanner, L$source)
## , , = db
##
##
## ORI ROY ELI HSS
## untrimmed 0 123 0 0
## trimmed 0 123 0 0
##
## , , = binfiles
##
##
## ORI ROY ELI HSS
## untrimmed 0 123 0 0
## trimmed 0 125 123 0
Do the two recomputed sets of ROY metrics match those computed at sawing time?
cols = c(c('scanner','source','trimstate','bindir','seq'),log.metrics)
t(L[L$SWILogNumber==140, cols])
## 38 227 350
## scanner "ROY" "ROY" "ROY"
## source "db" "db" "binfiles"
## trimstate "untrimmed" "trimmed" "untrimmed"
## bindir NA NA "KEEPBINFILES"
## seq " 91203" "119902" " 91203"
## length "6076.139" "4826.000" "6076.139"
## volume "0.4732999" "0.3491301" "0.4732999"
## led "397.1552" "327.3679" "397.1552"
## sed "302.1312" "301.8128" "302.1312"
## a0 "296.6347" "291.5422" "296.6347"
## a1 "-18.61085" " 12.85245" "-18.61085"
## a2 "91.17738" "28.88385" "91.17738"
## taper "11.942869" " 8.648218" "11.942869"
## waist "0.6174072" "0.3100419" "0.6174072"
## ovality "9.118804e-05" "5.305785e-05" "9.118804e-05"
## whorliness "1.1883257" "1.0611178" "1.1883257"
## sweep1 "0.6748855" "0.6149826" "0.6748855"
## sweep2 "0.2116119" "0.2390790" "0.2116119"
## gaxis_maxdev NA NA "24.91641"
## gaxis_volume NA NA "1182793.0"
## sd_xsectnarea_butthalf NA NA "12484.380"
## wobble NA NA "3.735551"
## wobble_x NA NA "6.307136"
## wobble_y NA NA "6.238595"
## 473 617 633
## scanner "ROY" "ROY" "ELI"
## source "binfiles" "binfiles" "binfiles"
## trimstate "trimmed" "trimmed" "trimmed"
## bindir "KEEPBINFILES" "ROY" "ELI"
## seq " 91203" "119902" " 104"
## length "4907.644" "4826.000" "4572.000"
## volume "0.3430806" "0.3491301" "0.3186620"
## led "332.5861" "327.3679" "328.7093"
## sed "302.1312" "301.8128" "284.4171"
## a0 "292.8333" "291.5422" "284.5500"
## a1 " 4.26992" " 12.85245" " 18.90512"
## a2 "40.18010" "28.88385" "21.28797"
## taper " 9.057304" " 8.648218" " 8.791140"
## waist "0.4170661" "0.3100419" "0.2546021"
## ovality "1.445811e-05" "5.305785e-05" "5.458508e-05"
## whorliness "1.1509781" "1.0611178" "0.9380907"
## sweep1 "0.9134273" "0.6149826" "0.9181781"
## sweep2 "0.1974636" "0.2390790" "0.2364718"
## gaxis_maxdev "21.99987" "14.32311" "19.19285"
## gaxis_volume " 513483.2" " 384893.8" " 433747.9"
## sd_xsectnarea_butthalf " 7424.744" " 7006.620" " 7013.027"
## wobble "3.060341" "2.227319" "2.280054"
## wobble_x "4.807097" "4.290384" "5.076679"
## wobble_y "5.769047" "4.837054" "4.598366"
t(L[L$SWILogNumber==150, cols])
## 57 133 256
## scanner "ROY" "ROY" "ROY"
## source "db" "db" "binfiles"
## trimstate "untrimmed" "trimmed" "untrimmed"
## bindir NA NA "KEEPBINFILES"
## seq " 92583" "119808" " 92583"
## length "4724.400" "4724.400" "4724.400"
## volume "0.9785541" "0.9746568" "0.9785541"
## led "547.3299" "542.7364" "547.3299"
## sed "510.3448" "508.4683" "510.3448"
## a0 "499.5402" "498.2023" "499.5402"
## a1 "22.85488" "28.98543" "22.85488"
## a2 "30.34987" "22.05943" "30.34987"
## taper "11.26169" "10.80452" "11.26169"
## waist "0.3399409" "0.2470820" "0.3399409"
## ovality "0.0003671312" "0.0005038186" "0.0003671312"
## whorliness "1.125857" "1.019570" "1.125857"
## sweep1 "2.043897" "1.787210" "2.043897"
## sweep2 "0.5922525" "0.6586372" "0.5922525"
## gaxis_maxdev NA NA "45.61970"
## gaxis_volume NA NA "2849045"
## sd_xsectnarea_butthalf NA NA "14339.96"
## wobble NA NA "6.342098"
## wobble_x NA NA "12.46894"
## wobble_y NA NA "11.21478"
## 379 616 659
## scanner "ROY" "ROY" "ELI"
## source "binfiles" "binfiles" "binfiles"
## trimstate "trimmed" "trimmed" "trimmed"
## bindir "KEEPBINFILES" "ROY" "ELI"
## seq " 92583" "119808" " 10"
## length "4856.572" "4724.400" "4857.212"
## volume "0.9542162" "0.9746568" "0.9728274"
## led "557.1632" "542.7364" "534.6488"
## sed "510.3448" "508.4683" "503.1886"
## a0 "499.4897" "498.2023" "492.0899"
## a1 "18.00893" "28.98543" "16.54381"
## a2 "36.43158" "22.05943" "32.95395"
## taper "11.20966" "10.80452" "10.19057"
## waist "0.3861521" "0.2470820" "0.3491993"
## ovality "0.0003639022" "0.0005038186" "0.0003315628"
## whorliness "1.143183" "1.019570" "0.892767"
## sweep1 "2.191713" "1.787210" "1.816740"
## sweep2 "0.4620330" "0.6586372" "0.5351630"
## gaxis_maxdev "51.69439" "39.89044" "42.86146"
## gaxis_volume "2735936" "2770480" "2628537"
## sd_xsectnarea_butthalf "14144.61" "13459.59" "12639.84"
## wobble "6.251340" "5.232295" "5.510685"
## wobble_x "12.34018" "12.45737" "10.76491"
## wobble_y "10.81060" "11.70114" "12.18194"
Conclusions:
How do log metrics from the different scanners and processing paths compare?
library(reshape2)
# flipped logs
flipped = c(208,207,203,210,147,202,149,155,168,200,209,212,196,199,127,158,169,181,177,166,151,182,179,101,178,106,175,124,172,104,187,117,142,114,113)
myplot = function(trimstate,prop) {
LL = dcast(L[L$trimstate==trimstate&!is.na(L[,prop]),], 'SWILogNumber ~ scanner + source', mean, value.var=prop)
try(print(splom(LL[,2:ncol(LL)], group=LL$SWILogNumber%in%flipped, main=prop)), silent=FALSE)
return(LL)
}
for (p in log.metrics) {try(LL<-myplot('untrimmed',p), silent=FALSE)}
for (p in log.metrics) {try(LL<-myplot('trimmed',p), silent=FALSE)}
myplot = function(prop) {
LL = dcast(L[!is.na(L[,prop]),], 'SWILogNumber ~ scanner + source + trimstate', mean, value.var=prop)
try(print(splom(LL[,2:ncol(LL)], group=LL$SWILogNumber%in%flipped, main=prop)), silent=FALSE)
return(LL)
}
for (p in log.metrics) {try(LL<-myplot(p), silent=FALSE)}
Plot all the xsectn data, sorted by some log metric.
X = read.csv('/home/harrinjj/G/Projects/Hyne/USNR/xsectns.csv')
X = X[order(X$DistanceFromLE),]
re = "(?:.*USNR Image Files/)([^/]+)/(?:tri_cam)?(.+)\\.bin"
X$scanner = gsub(re,"\\1",X$binfn,perl=TRUE)
X$bindir = X$scanner
X$seq = as.numeric(gsub(re,"\\2",X$binfn,perl=TRUE))
# deal with the 8 non-trial logs that were on the deck
ii = X$scanner%in%c('HSS','ELI')
X$seq[ii] = X$seq[ii] - 8
#ii=X$scanner%in%c('ORI','ELI','HSS')&X$seq>0
ii=X$scanner%in%c('ELI','HSS')&X$seq>0
X$SWILogNumber[ii] = swi.log.numbers.in.sawing.order[X$seq[ii]]
for (ID in unique(X$seq[X$seq>500])) {
if (!is.na(HH[ID])) {
X$SWILogNumber[X$seq==ID] = HH[ID]
}
}
# shorten the binfn
X$binfn = gsub('/media/Q/SWI/LogsAndStems/Hyne_2014/Aug2014/USNR Image Files/','',X$binfn)
# drop HSS binfiles - these are CANTS not logs
X = X[X$scanner%in%c('ELI'),]
# drop the first 8 logs...not part of trial
X = X[X$seq>0,]
LL = sqlQuery(ch, 'select * from tmp_logquality')
X$SWILogNumber = factor(X$SWILogNumber, levels=LL$SWILogNumber[order(LL$crook_avg)], ordered=TRUE)
for (p in setdiff(names(X),c('binfn','trimstate','DistanceFromLE','scanner','seq','SWILogNumber','bindir'))) {
print(xyplot(X[,p] ~ DistanceFromLE | SWILogNumber, X, main=p, type="l"
#.strip=TRUE, subset=X$binfn%in%sample(binfns,100), layout=c(10,10,1)
))
}
Perhaps our selection strategy (looking for wide ranges in things like ovality) have inadvertently led to un-robust shapes. E.g. a log with a flapping broken bit looks weird to the scanner and appears ‘interesting’ to the selection software.
in prediction models try using log metrics recomputed from binfiles from: DONE
sawing phase ELI scanner
if (!require("DT")) devtools::install_github("rstudio/DT")
## Loading required package: DT
#datatable(data, options = list(), class = "display", callback = JS("return table;"), rownames, colnames, container, caption = NULL, filter = c("none", "bottom", "top"), server = FALSE, escape = TRUE, style = "default", extensions = list())
datatable(L)